Environmental Data

In this file, the goal is to quantify the historical cloud cover and wind speeds in the planned flight boxes and to assess the implications for BioSCape operations.

To assess potential cloud cover during the BioSCape campaign, here we use cloud cover data from the MODIS aqua (https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GA) and terra (https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MOD09GA) daily surface reflectance products with a 1km resolution. The QA metadata for these products assigns each raster cell one of 4 cloud categories: Clear, Cloudy, Mixed, or “Not set, assumed clear”. Here, we consider any raster cell categorized as “cloudy” to be cloud covered and other categories to be cloud-free (Figure 1). To assess potential wind speeds, we used wind speed data from ERA5. Wind data are hourly and have a 0.25 degree resolution.

# Load required packages

# Load packages
  library(rgee)
  library(targets)
  library(sf)
  library(terra)
  library(raster)
  library(tidyverse)
  library(lubridate)
  library(leaflet)
  library(ggplot2)
  library(ggpubr)
  library(leafem)
  library(plotly)



#Load required data

  # get domain
    domain <- st_read("data/output/domain.gpkg",
                      quiet = TRUE)
    domain_sf <- domain
  
  # get flight boxes
    boxes <- st_read("data/flight_planning/20221026_flightboxes.gpkg",
                     quiet = TRUE)
    
    boxes$id <- 1:20 # need a unique ID to make things easier
    
    boxes$ordered_id[c(11,10,15,14,20,18,2,13,17,16,4,1,6,8,3,19,5,9,7,12)] <- 1:20
    
    boxes_sf <- boxes
    
  # Download table from drive (to see the code underlying this or to update the data, see the file "R/mock_flights_earth_engine.R")
    googledrive::drive_download(file = "EMMA/cloud_stats.csv",
                                path = "data/test_cloud_stats.csv",
                                overwrite = TRUE)
    
    cloud_table <- read.csv("data/test_cloud_stats.csv")

  # modis clouds

    cloud_table %>%
      mutate(year = year(date),
             month = month(date),
             day = day(date),
             day_of_year = yday(date)) -> cloud_table
  
  #Load era5 wind data
    
    era5_wind_table <- readRDS("data/output/era_wind_weighted.RDS")
#Load in the correct projection (for some reason this is handled incorrectly otherwise)

  nasa_proj <- "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs"

#Load the example layers
    
  list.files(path = "data/flight_planning/",
             pattern = "example_cloud_cover",
             full.names = TRUE) %>%
    rast() -> cloud_examples
  
# Generate the bounding box used
    domain_plus_boxes <-
    st_union(domain_sf,boxes_sf) %>%
      st_bbox() %>%
      st_as_sfc()


  crs(cloud_examples) <- nasa_proj
  
  cloud_examples %>%
    project(y = terra::crs(domain_sf,proj=TRUE)) %>%
    crop(y = vect(domain_plus_boxes)) -> cloud_examples

c1 <-  
cloud_examples[[1]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

c2 <-  
cloud_examples[[2]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

c3 <-  
cloud_examples[[3]] %>%
  as.data.frame(xy = TRUE) %>%
  mutate(clouds = as.logical(state_1km))%>%
ggplot() +
  geom_tile(aes(x = x,
                y = y,
                fill = clouds))+
  scale_fill_manual(values = c("light blue","white"))+
  geom_sf(data = domain_sf,fill=NA)+
  xlab(NULL)+
  ylab(NULL)+
  scale_x_continuous(expand = (c(0,0)))+
  scale_y_continuous(expand = c(0,0))

ggarrange(c1,c2,c3,
          common.legend = TRUE,
          ncol = 1)
Figure 1. Binary MODIS cloud data. Clouds are in white, non-clouds in blue.

Figure 1. Binary MODIS cloud data. Clouds are in white, non-clouds in blue.

Cloud Cover and Wind Speed

To visualize spatial patterns of cloud cover and wind speed, we calculated the averages for each raster cell (Figure 2). For cloud cover, to took the mean value across days (October-December) and years (2000-present). For wind speed, we took median values. We also include median wind speed estimates from FEWSNET (https://developers.google.com/earth-engine/datasets/catalog/NASA_FLDAS_NOAH01_C_GL_M_V001), which are monthly estimates at ~11km resolution.

#Pull in other bioscape layers

  boxes_sf %>%
  st_transform(crs = st_crs(4326)) -> flights

  team_requests <- st_read("data/manual_downloads/BIOSCAPE_proposed/20221014_team_polygons.gpkg",quiet = TRUE) %>%
      st_transform(crs = st_crs(4326))

  domain_sf %>%
      st_transform(crs = st_crs(4326)) -> domain_wgs84
  
  mean_cloud_cover <- raster("data/output/mean_cloud_cover.tif")
  

#Make a palette
  pal <- colorNumeric(palette = colorRamp(c("white", "blue"), interpolate = "spline"),
                      domain = unique(flights$prop_clear))
  
  pal2 <- colorNumeric(palette = "Blues",
                      domain = 0:1, #unique(values(mean_cloud_cover)),
                      reverse = TRUE)

#Get era wind data  
  median_era5_wind_speed <- raster("data/output/median_era5_speed.tif")

#Get fewsnet wind data  
  median_fewsnet_wind_speed <- raster("data/output/median_wind_fewsnet.tif")

#Make a palette
  pal_era5 <- colorNumeric(palette = colorRamp(c("white", "magenta"), interpolate = "spline"),
                      domain = unique(c(values(median_era5_wind_speed),values(median_fewsnet_wind_speed))),na.color = NA)

#Make labels
labels <- sprintf(as.character(flights$prop_clear)) %>%
  lapply(htmltools::HTML)


  boxes_sf %>%
  st_transform(crs = st_crs(4326)) %>%
leaflet() %>%
  addProviderTiles("Esri.NatGeoWorldMap", group = "NatGeo") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "World Imagery") %>%
  addMapPane("flights", zIndex = 420) %>%
  addMapPane("requests", zIndex = 410) %>% 
  addRasterImage(x = mean_cloud_cover,
                 group = "Clouds",
                 colors = pal2,
                 opacity = 1) %>%
      addRasterImage(x = median_era5_wind_speed,
                 group = "ERA5 Wind",
                 colors = pal_era5,
                 opacity = 1) %>%
      addRasterImage(x = median_fewsnet_wind_speed,
                 group = "FEWSNET Wind",
                 colors = pal_era5,
                 opacity = 1) %>%
     addPolygons(stroke = TRUE,
              group = "Flights",
              color = "black",
              opacity = 1,
              weight = 1,
              label = ~as.character(ordered_id),
              labelOptions = labelOptions(noHide = T,
                                          textOnly = T,
                                          textsize = 3),
              options = pathOptions(pane = "flights"),
              fill = FALSE)%>%
      addPolygons(data = team_requests%>%
                st_zm(drop = T, what = "ZM"),
                  stroke = TRUE,
                  color = "black",
                  group = "Requests",
              options = pathOptions(pane = "requests"),
              fill = FALSE,
              weight = 1)%>%  
    addMouseCoordinates() %>%
    #addImageQuery(sampling_options_wgs84, type="mousemove", layerId = "park_name") %>%
  leaflet::addLegend(position = "bottomright",
            pal = pal2,
            values = 0:1,
            opacity = 1,
            title = "Mean\nCloud\nCover") %>%
    leaflet::addLegend(position = "bottomleft",
            pal = pal_era5,
            values = min(na.omit(c(values(median_era5_wind_speed),values(median_fewsnet_wind_speed)))):max(na.omit(c(values(median_era5_wind_speed),values(median_fewsnet_wind_speed)))),
            opacity = 1,
            title = "Median\nWind\nSpeed",) %>%
    addLayersControl(
    baseGroups = c("World Imagery","NatGeo"),
    overlayGroups = c("Flights","Requests","Clouds","ERA5 Wind", "FEWSNET Wind"),
    options = layersControlOptions(collapsed = FALSE),
    position = "topright") %>%
  hideGroup(c("Requests","ERA5 Wind", "FEWSNET Wind"))

Figure 2. Mean cloud cover. Whiter raster cells are cloudier, bluer cells are clearer. Boxes shown are flight boxes, labeled with a unique ID. Polygons are the requested sampling regions.

Cloud cover over time

To visualize temporal patterns in cloud cover, we calculated the mean cloud cover for each flight box (Figure 3).

  #ID by day of year
  
    cloud_table %>%
        mutate(date = as.Date(day_of_year,origin="2022-12-31"))%>%
        mutate(start_of_month = floor_date(date,unit = "month"),
               month_label = month(start_of_month,label = TRUE),
               julian_label = yday(start_of_month),
               day_of_month = mday(date),
               md_label = paste(month_label,"-",day_of_month,sep = "")) %>%
      group_by(id,day_of_year) %>%
      left_join(y = boxes_sf %>%
      st_drop_geometry()) -> temp
      

    { temp %>%
      ggplot() +
      geom_tile(mapping = aes(x = day_of_year,
                              y = ordered_id,
                              fill = mean))+
      scale_fill_gradient(low = "sky blue",
                          high = "white")+
      facet_wrap(~year)+
        scale_x_continuous(breaks = temp$julian_label,
                           labels = temp$month_label)+
      labs(fill="Mean\nCloud\nCover",
           y="Flight Box ID",
           x = "Day of Year")+
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))} %>%
  ggplotly()

Figure 3. Rows represent different flight boxes (see Fig 2.), columns different days.

Proportion of clear days for each flight box

Here, we define a “clear” day as one with a mean cloud cover of less than 10 percent.

    #prop clear days
    {cloud_table %>%
      na.omit()%>%
      filter(month != 9)%>%
      mutate(binary_clear = dplyr::if_else(mean <= .1,true = 1,false = 0)) %>%
      group_by(id)%>%
      summarize(prop_clear = sum(binary_clear)/n(),
                clear_days = sum(binary_clear),
                total_days = n())%>%
      inner_join(x = boxes_sf)%>%
      ggplot(mapping = aes(fill = prop_clear))+
      geom_sf()+
      geom_sf(data = domain,inherit.aes = FALSE,fill=NA)+
      scale_fill_gradient(low = "white",high = "sky blue",limits=c(0,1))+
      geom_sf_text(aes(label = round(prop_clear,digits = 2)))+
      labs(fill = "Prop.\nClear",
           x=NULL,
           y=NULL)} %>%
  ggplotly()

Figure 6. Proportion of clear days (mean cloud cover < 10% ) for each flight box. Values in boxes are the proportion of clear days for that box

Campaign Simulations

In order to estimate how successful our campaign might be, we conducted a simulation using the MODIS aqua and terra cloud data.

  1. Calculate mean cloud cover for each flight box for each day in the temporal window of interest (October-December, 2000 to present).

  2. Rank flight boxes in descending order of cloud cover.

  3. For each day in time time series:

  1. If there are at least 60 days in which to sampling, continue. Else, skip.
  2. Sample the highest ranking (i.e. hardest to sample) site that is below 5% cloud cover (if any) and hasn’t been sampled.
  3. Repeat b until all sites have been sampled or no more time remains
  #code underlying simulations available in the file "R/flight_window.R"

   
      #readRDS("data/temp/sim_output_10pct.RDS")%>%
      readRDS("data/temp/sim_output_05pct.RDS")%>%
        #readRDS("data/temp/sim_output_01pct.RDS")%>%  
        na.omit()%>%
        group_by(start_date)%>%
        summarise(sites_done = sum(!is.na(box_id)),
                  mean_cc = mean(na.omit(cloud_cover)),
                  days_taken = max(date)-min(start_date)+1)%>%
        ungroup()%>%
        mutate(day_of_year = yday(as_date(start_date)),
               year = year(as_date(start_date)))%>%
        group_by(day_of_year)%>%
        summarise(q0 = quantile(days_taken,probs=0),
                  q25 = quantile(days_taken,probs=0.05),
                  q50 = quantile(days_taken,probs=0.5),
                  q75 = quantile(days_taken,probs=0.95),
                  q1 = quantile(days_taken,probs=1)
        ) %>%
        mutate(date = as.Date(day_of_year,origin="2022-12-31"))%>%
        mutate(start_of_month = floor_date(date,unit = "month"),
               month_label = month(start_of_month,label = TRUE),
               julian_label = yday(start_of_month),
               day_of_month = mday(date),
               md_label = paste(month_label,"-",day_of_month,sep = ""))->test
      
      test %>%    
        ggplot()+
        geom_ribbon(aes(ymin=q0,ymax=q1, x = day_of_year),col="grey",alpha=0.2)+
        geom_ribbon(aes(ymin=q25,ymax=q75, x = day_of_year),col="grey",alpha=0.5)+
        geom_line(aes(x=day_of_year,y=q50))+
        geom_hline(yintercept = 37,lty=2)+
        scale_x_continuous(breaks = test$day_of_year,
                           labels = test$md_label)+ #note: it seems like it should be possible to inherit these
        ylab("Flight Days Needed")+
        xlab("Campaign Starting Day")+
        geom_text(label = "estimated total number of flight days",
                  y=37.3,
                  x=280)+
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) 
Figure 6. Estimated length of time needed to finish campaign. The dark grey area represents the 50% CI, the light gray the  90% CI, and the solid line represents the median.

Figure 6. Estimated length of time needed to finish campaign. The dark grey area represents the 50% CI, the light gray the 90% CI, and the solid line represents the median.